* Creates results for Table 5

use "$savedata/masterdata_09onwards.dta", clear

egen vol_trust = sum(one), by(pconsult trust_num)

* Doctor FEs

foreach x in 10 25 50 100{
egen vol`x' = sum(one) if akm_set_mainspef`x'==1, by(pconsult)	
gen sample`x'=0
replace sample`x'=1 if akm_set_mainspef`x'==1 & vol_trust>=`x'
}

*/

keep if sample25==1
gen vol = vol25

preserve

reghdfe survive365 c.prevyear_cost, absorb(i.sex##i.derv_age i.black i.mixed i.chinese i.asian i.race_miss i.ynch* i.prevyear_stroke i.di1 i.di2 i.di3 i.di4 i.di5 i.shock i.arythmia i.arthero i.arrest i.dow##i.admidate_mont##i.finyear i.hyid i.doctor_id, savefe) keepsingleton

* save hospital FE
gen hospfe = __hdfe19__

* Predict outcomes without hospital FE
reghdfe survive365 c.prevyear_cost, absorb(i.sex##i.derv_age i.black i.mixed i.chinese i.asian i.race_miss i.ynch* i.prevyear_stroke i.di1 i.di2 i.di3 i.di4 i.di5 i.shock i.arythmia i.arthero i.arrest, savefe) keepsingleton
predict pred1, xb

collapse (mean) pred1 survive365 all_death365 hospfe (sum) one, by(doctor_id trust_num trust_code)
drop if doctor_id==.

* Sort doctors by the hospitals in which they do the most procedures - want to compare the two with the most 
gen minusone = -one

sort doctor_id minusone

bys doctor_id: gen x=_n
egen xx=max(x), by(doctor_id)

* Keep only doctors working in multiple hospitals (they are the people we can compare)
gen multiple = 0
replace multiple = 1 if xx>1

keep if multiple==1

* For simplicitly let's just compare across two different hospitals
keep if x<3

gen survive_rate1 = survive365 if x==1
gen survive_rate2 = survive365 if x==2
egen s1 = max(survive_rate1), by(doctor_id)
egen s2 = max(survive_rate2), by(doctor_id)

gen pred_survive1 = pred1 + hospfe if x==1
gen pred_survive2 = pred1 + hospfe if x==2
egen ps1 = max(pred_survive1), by(doctor_id)
egen ps2 = max(pred_survive2), by(doctor_id)

gen diff1 = s1 - ps1
gen diff2 = s2 - ps2

xtile d1_4 = diff1, n(4)
xtile d2_4 = diff2, n(4)

tab d1_4 d2_4, row nofreq

* For bootstrap

foreach x in 1 2 3 4{
count if d1_4==`x'
gen x`x'=`r(N)'
}

	foreach row in 1 2 3 4{
		foreach col in 1 2 3 4{
			
		count if d1_4==`row' & d2_4==`col'
		gen cell`row'`col' = `r(N)'
		gen freq`row'`col' = cell`row'`col' / x`row'
		su freq`row'`col'
		matrix obs_f`row'`col' = `r(mean)'
		}
	}
	
restore	

capture program drop myboot2

program define myboot2, rclass

* draw my bootstrap sample
preserve
bsample, strata(doctor_id)

reghdfe survive365 c.prevyear_cost, absorb(i.sex##i.derv_age i.black i.mixed i.chinese i.asian i.race_miss i.ynch* i.prevyear_stroke i.di1 i.di2 i.di3 i.di4 i.di5 i.shock i.arythmia i.arthero i.arrest i.dow##i.admidate_mont##i.finyear i.hyid i.doctor_id, savefe) keepsingleton

gen hospfe = __hdfe19__

reghdfe survive365 c.prevyear_cost, absorb(i.sex##i.derv_age i.black i.mixed i.chinese i.asian i.race_miss i.ynch* i.prevyear_stroke i.di1 i.di2 i.di3 i.di4 i.di5 i.shock i.arythmia i.arthero i.arrest, savefe) keepsingleton
predict pred1, xb

collapse (mean) pred1 survive365 all_death365 hospfe (sum) one, by(doctor_id trust_num trust_code)
drop if doctor_id==.

* Sort doctors by the hospitals in which they do the most procedures - want to compare the two with the most 
gen minusone = -one

sort doctor_id minusone

bys doctor_id: gen x=_n
egen xx=max(x), by(doctor_id)

* Keep only doctors working in multiple hospitals (they are the people we can compare)
gen multiple = 0
replace multiple = 1 if xx>1

keep if multiple==1

* TFor simplicitly let's just compare across two different hospitals
keep if x<3

gen survive_rate1 = survive365 if x==1
gen survive_rate2 = survive365 if x==2
egen s1 = max(survive_rate1), by(doctor_id)
egen s2 = max(survive_rate2), by(doctor_id)

gen pred_survive1 = pred1 + hospfe if x==1
gen pred_survive2 = pred1 + hospfe if x==2
egen ps1 = max(pred_survive1), by(doctor_id)
egen ps2 = max(pred_survive2), by(doctor_id)

gen diff1 = s1 - ps1
gen diff2 = s2 - ps2

xtile d1_4 = diff1, n(4)
xtile d2_4 = diff2, n(4)

tab d1_4 d2_4, row nofreq

* For bootstrap

foreach x in 1 2 3 4{
count if d1_4==`x'
gen x`x'=`r(N)'
}

	foreach row in 1 2 3 4{
		foreach col in 1 2 3 4{
			
		count if d1_4==`row' & d2_4==`col'
		gen cell`row'`col' = `r(N)'
		gen freq`row'`col' = cell`row'`col' / x`row'
		su freq`row'`col'
		return scalar d_f`row'`col' = `r(mean)'
		}
	}

restore

end


** Then run bootstrap here

#delimit ;
* Simulate 199 times to test;
simulate diff_f11 = r(d_f11) diff_f12 = r(d_f12) diff_f13 = r(d_f13) diff_f14 = r(d_f14) 
diff_f21 = r(d_f21) diff_f22 = r(d_f22) diff_f23 = r(d_f23) diff_f24 = r(d_f24) 
diff_f31 = r(d_f31) diff_f32 = r(d_f32) diff_f33 = r(d_f33) diff_f34 = r(d_f34) 
diff_f41 = r(d_f41) diff_f42 = r(d_f42) diff_f43 = r(d_f43) diff_f44 = r(d_f44) 
, reps(199) seed(32786105): myboot2;


#delimit ;
* Boostrap;
bstat, stat(obs_f11, obs_f12, obs_f13, obs_f14,
obs_f21, obs_f22, obs_f23, obs_f24,
obs_f31, obs_f32, obs_f33, obs_f34,
obs_f41, obs_f42, obs_f43, obs_f44);

#delimit cr
* Output results
putexcel set "$results/Table5.xlsx", replace
putexcel A1 = "Variable"
putexcel B1 = "Coefficient"
putexcel C1 = "Std. error"

matrix b = e(b)'
putexcel A2 = matrix(b), rownames

matrix se = e(se)'
putexcel C2 = matrix(se)
